#############################################################################################
### Competitive Intervention, Protracted Conflict, and the Global Prevalence of Civil War ###
#############################################################################################
###################################### Replication Code #####################################
#############################################################################################
######################### Noel Anderson (noel.anderson@utoronto.ca) #########################
############################################################################################# 

# Load required packages
library(foreign) # To load .dta files in R
library(survival) # For duration analysis
library(coxphf) # For Firth penalized likelihood estimation
library(plyr) # For count() function
library(ggplot2) # For plotting and graphs
library(extrafont) # For plotting with better fonts in R

# Load the datasets (set target to the location where you saved the datasets)
conflict.list <- read.dta("<insert file location>/conflict_list.dta") # For figure 1
data <- read.dta("<insert file location>/dataset.dta") # For main analyses (Table 2 & 3)
data.25 <- read.dta("<insert file location>/data_25.dta") # For appendix B, 25 battle death threshold (no cumulative requirement)
data.2year <- read.dta("<insert file location>/data_2year.dta") # For appendix C, two-year intermittency rule
ci.start.year <- read.dta("<insert file location>/ci_start_year.dta") # For appendix E, figure A.1

###############
## Figure 1 ###
###############

# Count total conflicts, new conflicts, and wars ending for each year and then combine in a trends dataset
total.count <- count(conflict.list, vars="year")
total.start <- count(conflict.list, vars="year", "started")
total.end <- count(conflict.list, vars="year", "ended")
trend.data = cbind(total.count$year, total.count$freq, total.start$freq, total.end$freq)
colnames(trend.data) <- c("year","total","start","end")
trend.data <- as.data.frame(trend.data)

# Plot the trends data
dev.off()
fig1 <- ggplot(trend.data) + xlab("Year") + ylab("Number of Conflicts") +  
  scale_x_continuous(breaks = seq(1945,2010,5), minor_breaks=seq(1945,2010,5)) +
  geom_vline(xintercept = 1991, linetype = "dashed") +
  stat_smooth(method=loess, size = 0.7, aes(x=year, y=total, colour="loess",linetype = "loess"), se=FALSE) +
  geom_line(aes(x=year, y=total, colour="total", linetype="total"), size=0.7) +
  geom_line(aes(x=year, y=start, colour="start",linetype = "start"), size = 0.7) +
  geom_line(aes(x=year, y=end, colour="end",linetype = "end"), size = 0.7) +
  scale_colour_manual(name="", 
                      breaks=c("loess","total","start","end"), 
                      values=c("black","black","grey60","black"), 
                      labels=c("Loess","Ongoing","Outbreaks","Terminations")) +
  scale_linetype_manual(name="", 
                        breaks=c("loess","total","start","end"), 
                        values=c("dashed","dotted","solid","solid"), 
                        labels=c("Loess","Ongoing","Outbreaks","Terminations")) +  
  theme_bw() +
  theme(legend.title=element_blank(),  legend.text=element_text(size=16, family="Liberation Serif"),
        legend.position=c(.175, .8),
        legend.key.height = unit(1, "cm"),
        legend.key.width = unit(1.5, "cm"),
        axis.text.x = element_text(colour="black", size=13, family="Liberation Serif"), 
        axis.text.y = element_text(colour="black", size=13, family="Liberation Serif"), 
        axis.title=element_text(colour="black", size=16, family="Liberation Serif"),
        legend.background = element_rect(fill="white",colour="black", size=.5, linetype="solid"))
fig1

# Calculations of average number of outbreaks, Cold War vs. post-Cold War period
num.cw.years <- unique(length(which(total.count$year<1991)))
num.postcw.years <- unique(length(which(total.count$year>1990)))

cw.starts <- total.start[which(total.count$year<1991),]
cw.starts <- sum(cw.starts$freq)
cw.start.avg <- cw.starts/num.cw.years
cw.start.avg

postcw.starts <- total.start[which(total.count$year>1990),]
postcw.starts <- sum(postcw.starts$freq)
postcw.start.avg <- postcw.starts/num.postcw.years
postcw.start.avg 

###############
## Figure 2 ###
###############

# Count number of conflicts for each year
total.con <- count(data, vars="year")

# Create function to get percentages of conflicts with X type of military aid flows
counter <- function(x){
  store <- as.data.frame(table(data$year, x, dnn=c("year","count")))
  store <- store[which(store$count==1),]
  percent <- store$Freq/total.con$freq}

ci <- counter(data$ci)
ci <- as.data.frame(ci)
rebonly <- counter(data$rebonly)
rebonly <- as.data.frame(rebonly)
govonly <- counter(data$govonly)
govonly <- as.data.frame(govonly)
type <- cbind(total.con, ci, rebonly, govonly)

fig2 <- ggplot(type, aes(x=year, y=ci)) + xlab("Year") + ylab("Percent of Conflicts") +  
  scale_x_continuous(breaks = seq(1975,2010,5), minor_breaks=seq(1975,2010,5)) +
  scale_y_continuous(breaks = seq(0,0.65,0.05), limits=c(-0.001, 0.621), minor_breaks=seq(0,0.6,0.05)) +
  geom_vline(xintercept = 1991, linetype = "dashed") +
  geom_point(aes(x=year, y=rebonly, shape="rebonly",colour="rebonly"), size=2) +
  geom_smooth(method = "gam", formula = y ~ s(x, k = 4), size = 0.7, aes(x=year, y=rebonly, colour="rebonly"), se=FALSE) +
  geom_point(aes(x=year+0.05, y=govonly+0.001, shape="govonly",colour="govonly"), size=2) +
  geom_smooth(method = "gam", formula = y ~ s(x, k = 4), size = 0.7, aes(x=year, y=govonly, colour="govonly"), se=FALSE) +
  geom_point(aes(x=year, y=ci, shape="ci",colour="ci"), size=2) +
  geom_smooth(method = "gam", formula = y ~ s(x, k = 4), size = 0.7, aes(x=year, y=ci, colour="ci"), se=FALSE) +
  scale_shape_manual(name="", 
                     breaks=c("ci","rebonly","govonly"),
                     values=c(16, 17, 15), 
                     labels=c("Competitive Intervention   ","Rebels Only   ","Government Only   ")) +
  scale_colour_manual(name="", 
                      breaks=c("ci","rebonly","govonly"), 
                      values=c("black", "grey35","grey70"), 
                      labels=c("Competitive Intervention   ","Rebels Only   ","Government Only   ")) +
  theme_bw() +
  theme(legend.position="bottom", legend.text=element_text(size=16, family="Liberation Serif"), legend.key.width=unit(1,"cm"),
        legend.background = element_rect(fill="white",colour=NA),
        axis.text.x = element_text(colour="black", size=13, family="Liberation Serif"), 
        axis.text.y = element_text(colour="black", size=13, family="Liberation Serif"), 
        axis.title=element_text(colour="black", size=16, family="Liberation Serif"))
fig2

# Calculations of averages in different periods
mean(type$ci[type$year<1991])
mean(type$ci[type$year>1990])

us_ussr <- counter(data$us_ussr)
us_ussr <- as.data.frame(us_ussr)
otherci <- counter(data$otherci)
otherci <- as.data.frame(otherci)
type <- cbind(type, us_ussr, otherci)
mean(type$us_ussr[type$year<1991])
mean(type$otherci[type$year<1991])

# t-test
t.test(type$ci[type$year<1991],type$ci[type$year>1990], alternative="two.sided")

#############
## Table 2 ##
#############

model1 <- coxph(Surv(t0, t1, ended) ~ coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data)
model1 

model2 <- coxph(Surv(t0, t1, ended) ~ ci + cluster(country), data=data)
model2

model3 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + cluster(country), data=data)
model3

model4 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + cluster(country), data=data)
model4

model5 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data)
model5 

model6 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data)
model6

#############
## Table 3 ##
#############

model7 <- coxph(Surv(t0, t1, ended) ~ allci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data)
model7

model8 <- coxph(Surv(t0, t1, ended) ~ ci + nosupport + govonly + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data)
model8

model9 <- coxph(Surv(t0, t1, ended) ~ ci + nosupport + rebonly + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data)
model9

model10 <- coxph(Surv(t0, t1, ended) ~ us_ussr + otherci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data) 
model10           

model11 <- coxphf(Surv(t0, t1, ended) ~ us_ussr + otherci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong, firth=TRUE, pl=TRUE, maxit=125, data=data) 
model11

##############################
########## APPENDIX ##########
##############################

####################################
## Appendix A: Summary Statistics ##
####################################

###############
## Table A.1 ##
###############

# Create function to get summary statistics
summary.stats <- function(x){
  mean <- mean(x)
  sd <- sd(x)
  min <- min(x)
  max <- max(x)
  cbind(mean, sd, min, max)
}

# Summary statistics, models 1-7
round(summary.stats(data$ci), 3)
round(summary.stats(data$coldwar), 3)
round(summary.stats(data$loglaggdppercapppp), 3)
round(summary.stats(data$demolag), 3)
round(summary.stats(data$oil), 3)
round(summary.stats(data$logpop), 3)
round(summary.stats(data$mountains), 3)
round(summary.stats(data$ethnicconflict), 3)
round(summary.stats(data$secessionistconflict), 3)
round(summary.stats(data$multiparty), 3)
round(summary.stats(data$unpk), 3)
round(summary.stats(data$terrcontrol), 3)
round(summary.stats(data$parity), 3)
round(summary.stats(data$rebstrong), 3)

# Additional variables, model 8-11
round(summary.stats(data$allci), 3)
round(summary.stats(data$nosupport), 3)
round(summary.stats(data$govonly), 3)
round(summary.stats(data$rebonly), 3)
round(summary.stats(data$us_ussr), 3)
round(summary.stats(data$otherci), 3)

###################################################
## Appendix B: 25 Battle Death Threshold Results ##
###################################################

###############
## Table A.2 ##
###############

model1 <- coxph(Surv(t0, t1, ended) ~ coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25)
model1 

model2 <- coxph(Surv(t0, t1, ended) ~ ci + cluster(country), data=data.25)
model2

model3 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + cluster(country), data=data.25)
model3

model4 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + cluster(country), data=data.25)
model4

model5 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25)
model5 

model6 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25)
model6

###############
## Table A.3 ##
###############

model7 <- coxph(Surv(t0, t1, ended) ~ allci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25)
model7

model8 <- coxph(Surv(t0, t1, ended) ~ ci + nosupport + govonly + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25)
model8

model9 <- coxph(Surv(t0, t1, ended) ~ ci + nosupport + rebonly + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25)
model9

model10 <- coxph(Surv(t0, t1, ended) ~ us_ussr + otherci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.25) 
model10           

data.25 <- na.omit(data.25) # Listwise deletion of observations with missing data (the coxphf package cannot handle NAs)
model11 <- coxphf(Surv(t0, t1, ended) ~ us_ussr + otherci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong, firth=TRUE, pl=TRUE, maxit=125, data=data.25) 
model11

#####################################################
## Appendix C: Two-Year Intermittency Rule Results ##
#####################################################

###############
## Table A.4 ##
###############

model1 <- coxph(Surv(t0, t1, ended) ~ coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year)
model1 

model2 <- coxph(Surv(t0, t1, ended) ~ ci + cluster(country), data=data.2year)
model2

model3 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + cluster(country), data=data.2year)
model3

model4 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + cluster(country), data=data.2year)
model4

model5 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year)
model5 

model6 <- coxph(Surv(t0, t1, ended) ~ ci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year)
model6

###############
## Table A.5 ##
###############

model7 <- coxph(Surv(t0, t1, ended) ~ allci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year)
model7

model8 <- coxph(Surv(t0, t1, ended) ~ ci + nosupport + govonly + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year)
model8

model9 <- coxph(Surv(t0, t1, ended) ~ ci + nosupport + rebonly + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year)
model9

model10 <- coxph(Surv(t0, t1, ended) ~ us_ussr + otherci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong + cluster(country), data=data.2year) 
model10           

model11 <- coxphf(Surv(t0, t1, ended) ~ us_ussr + otherci + coldwar + loglaggdppercapppp + demolag + oil + logpop + mountains + ethnicconflict + secessionistconflict + multiparty + unpk + terrcontrol + parity + rebstrong, firth=TRUE, pl=TRUE, maxit=125, data=data.2year) 
model11

#######################################################
## Appendix E: Addressing Reverse Causality Concerns ##
#######################################################

################
## Figure A.1 ##
################

figa1 <- ggplot(data=ci.start.year, aes(x=year, y=ci_starts)) + 
  xlab("Year of Competitive Intervention Onset") + ylab("Number of Civil Wars") +
  scale_y_continuous(breaks = seq(0,30,5), limits=c(0, 30))+
  geom_bar(stat="identity", width=0.6, colour="black",fill="grey80") + 
  annotate("text", x = 1.01, y = 13.5, label = "66%",size=6.5,colour="black", family="Liberation Serif") +
  annotate("text", x = 2.01, y = 3.5, label = "17%",size=6.5,colour="black", family="Liberation Serif") +
  annotate("text", x = 3.01, y = 2, label = "10%",size=6.5,colour="black", family="Liberation Serif") +
  annotate("text", x = 4.01, y = 1, label = "0%",size=6.5,colour="black", family="Liberation Serif") +
  annotate("text", x = 5.01, y = 1.5, label = "7%",size=6.5,colour="black", family="Liberation Serif") +
  theme_bw() +
  theme(legend.background = element_rect(fill="white",colour=NA),
        axis.text.x = element_text(colour="black", size=13, family="Liberation Serif"), 
        axis.text.y = element_text(colour="black", size=13, family="Liberation Serif"), 
        axis.title=element_text(colour="black", size=16, family="Liberation Serif"))
figa1